This document demonstrates the use of the bRF and LASSO-D3S functions for integrative GRN inference.
Those functions infer the regulatory pathways of Arabidopsis thaliana’s roots in response to nitrate (N) induction from Varala et al., 2018.
They use as inputs the expression profiles of N-responsive genes and TFBS information. Prior TFBS information was built by searching in the promoters of the N-responsive genes the PWM of the N-responsive regulators.
load('rdata/inference_input_N_response_varala.rdata')
genes <- input_data$grouped_genes; length(genes)
## [1] 1426
tfs <- input_data$grouped_regressors; length(tfs)
## [1] 201
counts <- input_data$counts; dim(counts)
## [1] 1426 45
load("rdata/pwm_occurrences_N_response_varala.rdata")
dim(pwm_occurrence)
## [1] 1426 201
mean_expr <- rowMeans(counts)
var_expr <- matrixStats::rowSds(counts)*matrixStats::rowSds(counts)
How does the individual MSE of genes varies with \(\alpha\) for model estimation in GRN inferences?
# ALPHAS <- seq(0,1, by = 0.1)
# lmses <- data.frame(row.names = genes)
#
# for(alpha in ALPHAS){
# set.seed(121314)
# lmses[,as.character(alpha)] <- bRF_inference_MSE(counts, genes, tfs, alpha = alpha, nTrees = 4000,
# pwm_occurrence = pwm_occurrence, nCores = 18)
# }
#
# save(lmses, file = "results/logMSES_RFs_per_genes.rdata")
Clustering the genes :
load(file = "results/logMSES_RFs_per_genes_400trees.rdata")
mse_rf <- (lmses-rowMeans(lmses)) / matrixStats::rowSds(as.matrix(lmses))
ha = HeatmapAnnotation(
alpha = anno_simple(as.numeric(rep(colnames(mse_rf),1))),
annotation_name_side = "left")
heatmap_rf_4000 <- Heatmap(mse_rf, row_km = 3, cluster_columns = F, show_row_names = F,
name = "scaled logMSE (z-score)", top_annotation = ha,
row_title = "Genes", column_title = "alpha");heatmap_rf_4000
# lmses_lasso <- data.frame(row.names = genes)
#
# for(alpha in ALPHAS){
# set.seed(121314)
# lmses_lasso[,as.character(alpha)] <- LASSO.D3S_inference_MSE(counts, genes, tfs,
# normalized = TRUE,
# alpha = alpha, N = 50,
# pwm_occurrence = pwm_occurrence, nCores = 15)
# }
#
# save(lmses_lasso, file = "results/logMSES_Lasso_per_genes.rdata")
load(file = "results/logMSES_Lasso_per_genes.rdata")
mse_lasso <- (lmses_lasso-rowMeans(lmses_lasso)) / matrixStats::rowSds(as.matrix(lmses_lasso))
heatmap_lasso <- Heatmap(mse_lasso, row_km = 4, cluster_columns = F, show_row_names = F,
name = "scaled logMSE (z-score)", top_annotation = ha,
row_title = "Genes", column_title = "alpha"); heatmap_lasso
average_genes <- function(mse){
new_mse <- mse
new_mse[,"0-0.2"] <- rowMeans(new_mse[c("0", "0.1", "0.2")])
new_mse[,"0.3-0.6"] <- rowMeans(new_mse[c("0.3", "0.4", "0.5", "0.6")])
new_mse[,"0.7-1"] <- rowMeans(new_mse[c("0.7", "0.8", "0.9", "1")])
return(new_mse[c("0-0.2", "0.3-0.6", "0.7-1")])
}
smooth_rf <- average_genes(lmses)
smooth_lasso <- average_genes(lmses_lasso)
get_optimum <- function(gene, method = "rf"){
if(method=="rf")
return(names(which.min(smooth_rf[gene,])))
if(method=="lasso")
return(names(which.min(smooth_lasso[gene,])))
}
gene <- rownames(smooth_rf)[1]
clusters_rf <- sapply(genes, get_optimum, method = "rf")
clusters_lasso <- sapply(genes, get_optimum, method = "lasso")
table(clusters_rf); table(clusters_lasso)
## clusters_rf
## 0-0.2 0.3-0.6 0.7-1
## 859 295 272
## clusters_lasso
## 0-0.2 0.3-0.6 0.7-1
## 990 279 157
Heatmap(mse_rf,
row_km = 3, cluster_columns = F, show_row_names = F,
name = "MSE/Var", top_annotation = ha,
row_title = "Genes")+ rowAnnotation(
clusters_rf = clusters_rf,
col=list(clusters_rf= setNames(c("darkorange", "yellow", "green"),
nm = names(table(clusters_rf))) ))
Heatmap(mse_lasso,
row_km = 3, cluster_columns = F, show_row_names = F,
name = "MSE/Var", top_annotation = ha,
row_title = "Genes")+ rowAnnotation(
clusters_lasso = clusters_lasso,
col=list(clusters_lasso= setNames(c("darkorange", "yellow", "green"),
nm = names(table(clusters_lasso))) ))
Nombre de motifs dans le promoteur, niveau d’expression, ontologies, sont-ils des TFs? leur taille?
Quelle est la précision rappel des sous réseaux concernant ces gènes.
load("rdata/pwm_prom_jaspar_dap.rdata")
library(patchwork)
genes_info <- data.frame(genes = genes,
cluster_lasso = clusters_lasso[genes],
cluster_rf = clusters_rf[genes])
genes_info$is_tf <- genes %in% tfs
genes_info$var <- var_expr
genes_info$expr <- mean_expr
genes_info$nb_motifs <- table(pwm_prom$target)[genes]
genes_info%>%
ggplot(aes(x=cluster_rf, y=nb_motifs)) +
geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
geom_boxplot(width=0.1, fill = "white")+
ggtitle(("Number of motifs in promoter for RF groups")) +
stat_compare_means() + genes_info%>%
ggplot(aes(x=cluster_lasso, y=nb_motifs)) +
geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
geom_boxplot(width=0.1, fill = "white")+
ggtitle(("Number of motifs in promoter for LASSO groups")) +
stat_compare_means()
## Don't know how to automatically pick scale for object of type <table>.
## Defaulting to continuous.
## Don't know how to automatically pick scale for object of type <table>.
## Defaulting to continuous.
genes_info%>%
ggplot(aes(x=cluster_rf, y=var)) +
geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
geom_boxplot(width=0.1, fill = "white")+
ggtitle(("Gene variance for RF groups")) + scale_y_log10()+
stat_compare_means()+
genes_info%>%
ggplot(aes(x=cluster_lasso, y=var)) +
geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
geom_boxplot(width=0.1, fill = "white")+
ggtitle(("Gene variance for LASSO groups")) + scale_y_log10()+
stat_compare_means()
genes_info%>%
ggplot(aes(x=cluster_rf, y=expr)) +
geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
geom_boxplot(width=0.1, fill = "white")+
ggtitle(("Gene expression for RF groups")) + scale_y_log10()+
stat_compare_means()+
genes_info%>%
ggplot(aes(x=cluster_lasso, y=expr)) +
geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
geom_boxplot(width=0.1, fill = "white")+
ggtitle(("Gene expression for LASSO groups")) + scale_y_log10()+
stat_compare_means()
genes_info %>%
group_by(cluster_rf) %>%
summarise(n=n(),
tf_frac=sum(is_tf)/n()) %>%
ggplot(aes(x=cluster_rf, y=tf_frac,
label = paste("n=",n))) +
geom_bar(stat = "identity", aes(fill=tf_frac), alpha = 1)+
geom_hline(yintercept = length(tfs)/length(genes)) +
geom_text(y=0.2) + xlab("cluster RF") +
ggtitle("Fraction of TFs in RF groups")+ylim(c(0,0.2))+
genes_info %>%
group_by(cluster_lasso) %>%
summarise(n=n(),
tf_frac=sum(is_tf)/n()) %>%
ggplot(aes(x=cluster_lasso, y=tf_frac,
label = paste("n=",n))) +
geom_bar(stat = "identity", aes(fill=tf_frac), alpha = 1)+
geom_hline(yintercept = length(tfs)/length(genes)) +
geom_text(y=0.2) + xlab("cluster LASSO") +ylim(c(0,0.2))+
ggtitle("Fraction of TFs in LASSO groups")
Est-ce qu’on a significativement plus de TFs dans les genes qui sont
bien prédits avec les PWM chez le lasso?
fisher.test(matrix(c(length(genes), length(tfs), 157, 157*0.1910828),
nrow = 2), alternative = "greater")
##
## Fisher's Exact Test for Count Data
##
## data: matrix(c(length(genes), length(tfs), 157, 157 * 0.1910828), nrow = 2)
## p-value = 0.09628
## alternative hypothesis: true odds ratio is greater than 1
## 95 percent confidence interval:
## 0.9255823 Inf
## sample estimates:
## odds ratio
## 1.355392
load("results/networks_bRF_lasso.rdata")
edges <- edges[str_detect(names(edges), "0.075|0.05|0.01")]
color_palette = c( "#EC5D2F","#FE945C", "#FFC08E" )
settings <- c("method", "alpha", "density", "rep")
prettyZero <- function(l){
max.decimals = max(nchar(str_extract(l, "\\.[0-9]+")), na.rm = T)-1
lnew = formatC(l, replace.zero = T, zero.print = "0",
digits = max.decimals, format = "f", preserve.width=T)
return(lnew)}
get_precision <- function(group, method){
if(method == "lasso")
genes <- names(which(clusters_lasso == group))
if(method == "rf")
genes <- names(which(clusters_rf == group))
subset <- lapply(edges, function(net){net[net$to %in% genes,]})
val_conn <-
evaluate_networks(
subset,
validation = c("TARGET", "CHIPSeq", "DAPSeq"),
nCores = 10,
input_genes = genes,
input_tfs = tfs
)
val_conn[, settings] <-
str_split_fixed(val_conn$network_name, '_', length(settings))
val_conn$group <- group
val_conn$method_mse <- method
if(method == "rf")
val_conn<-val_conn[val_conn$method != "LASSO-D3S",]
if(method == "lasso")
val_conn<-val_conn[val_conn$method == "LASSO-D3S",]
return(val_conn)
}
precision <- data.frame()
for(group in unique(clusters_lasso)){
for(method in c("rf", "lasso")){
if(nrow(precision)==0)
precision<- get_precision(group, method)
else
precision <- rbind.data.frame(precision, get_precision(group, method))
}
}
##
## Parallel network validation with AranetBench Using 10 cores.
## 4.225 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 3.665 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 7.044 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 9.502 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 5.585 sec elapsed
##
## Parallel network validation with AranetBench Using 10 cores.
## 5.274 sec elapsed
(precision %>%
dplyr::select(-network_name) %>%
mutate(alpha = as.numeric(alpha)) %>%
ggplot(aes(color = density, x = alpha, y = precision)) +
ggh4x::facet_nested_wrap(vars(method, group), ncol = 8, nest_line = T) + geom_point() +
geom_smooth(aes(fill = density), alpha = 0.1) +
theme(
strip.background = element_blank(),
axis.title.x = element_text(size = 22),
title = element_text(size = 16),
strip.text = element_text(size = 16),
legend.text = element_text(size = 15),
axis.text = element_text(size = 12),
legend.position = 'top'
) +
xlab(expression(alpha)) + ylab("Precision") +
ggtitle("Precision against ConnecTF") +
scale_x_continuous(labels = prettyZero) +
scale_color_manual(name = "Network density", values = color_palette) +
scale_fill_manual(name = "Network density", values = color_palette))/(
precision %>%
dplyr::select(-network_name) %>%
mutate(alpha = as.numeric(alpha)) %>%
ggplot(aes(color = density, x = alpha, y = recall)) +
ggh4x::facet_nested_wrap(vars(method, group), ncol = 8, nest_line = T) + geom_point() +
geom_smooth(aes(fill = density), alpha = 0.1) +
theme(
strip.background = element_blank(),
axis.title.x = element_text(size = 22),
title = element_text(size = 16),
strip.text = element_text(size = 16),
legend.text = element_text(size = 15),
axis.text = element_text(size = 12),
legend.position = 'none'
) +
xlab(expression(alpha)) + ylab("Recall") +
ggtitle("Recall against ConnecTF") +
scale_x_continuous(labels = prettyZero) +
scale_color_manual(name = "Network density", values = color_palette) +
scale_fill_manual(name = "Network density", values = color_palette))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
library(DIANE)
##
## Attaching package: 'DIANE'
## The following object is masked from 'package:igraph':
##
## normalize
background <- convert_from_agi(genes)
##
for(group in unique(clusters_lasso)){
genes_i <- names(which(clusters_lasso == group))
print(paste("LASSO", length(genes_i), "genes,", group, "\n"))
genes_i <- convert_from_agi(genes_i)
go_lasso <- enrich_go(genes_i, background)
DIANE::draw_enrich_go(go_lasso)
print(go_lasso)
genes_i <- names(which(clusters_rf == group))
print(paste("RF", length(genes_i), "genes, min mse at", group))
genes_i <- convert_from_agi(genes_i)
go_rf <- enrich_go(genes_i, background)
DIANE::draw_enrich_go(go_rf)
print(go_rf)
# common_go <- intersect(go_lasso$ID, go_rf$ID)
# print(go_lasso[common_go,])
}
## [1] "LASSO 157 genes, 0.7-1 \n"
##
## [1] ID Description GeneRatio BgRatio pvalue p.adjust
## [7] qvalue geneID Count
## <0 rows> (or 0-length row.names)
## [1] "RF 272 genes, min mse at 0.7-1"
## ID Description GeneRatio
## GO:0050896 GO:0050896 response to stimulus 98/225
## GO:0042221 GO:0042221 response to chemical 68/225
## GO:0051716 GO:0051716 cellular response to stimulus 55/225
## GO:0010033 GO:0010033 response to organic substance 48/225
## GO:0070887 GO:0070887 cellular response to chemical stimulus 40/225
## GO:0009719 GO:0009719 response to endogenous stimulus 35/225
## GO:0007165 GO:0007165 signal transduction 35/225
## GO:0009725 GO:0009725 response to hormone 34/225
## GO:0071310 GO:0071310 cellular response to organic substance 28/225
## GO:0033993 GO:0033993 response to lipid 24/225
## GO:0071495 GO:0071495 cellular response to endogenous stimulus 21/225
## GO:0032870 GO:0032870 cellular response to hormone stimulus 20/225
## GO:1901698 GO:1901698 response to nitrogen compound 13/225
## BgRatio pvalue p.adjust qvalue
## GO:0050896 401/1192 3.742682e-04 0.033414812 0.030127501
## GO:0042221 227/1192 4.162561e-06 0.001973054 0.001778947
## GO:0051716 203/1192 1.015654e-03 0.040118342 0.036171545
## GO:0010033 147/1192 1.321293e-05 0.002843887 0.002564108
## GO:0070887 120/1192 4.754524e-05 0.005634111 0.005079834
## GO:0009719 111/1192 5.325665e-04 0.033414812 0.030127501
## GO:0007165 115/1192 1.120333e-03 0.040849055 0.036830371
## GO:0009725 107/1192 5.498441e-04 0.033414812 0.030127501
## GO:0071310 70/1192 1.799928e-05 0.002843887 0.002564108
## GO:0033993 69/1192 9.332992e-04 0.040118342 0.036171545
## GO:0071495 56/1192 6.344585e-04 0.033414812 0.030127501
## GO:0032870 52/1192 5.850858e-04 0.033414812 0.030127501
## GO:1901698 28/1192 7.056834e-04 0.033449395 0.030158681
## geneID
## GO:0050896 AtNIGT1/AtGDPD2/NA/STY46/ATBCA4/DIN10/ATMPK18/ATRLP57/B73/TGA1/AtbZIP63/PIP2;3/KFB01/AKS3/UGT76B1/ABF3/ARR9/ATWL2/ASG1/ARR4/ATNRT3.1/MRF1/NA/ABS3/ATGA3OX1/COP3/NA/ACH1/STOP2/ATSUC1/ATTLP9/CAR9/ATWRKY18/GBF3/5PTASE2/NA/SDI2/NA/NA/DHAR2/AtATL15/MIZ1/HYH/ACO2/GIS3/ATSEN1/ARR3/ATIRT3/EMB2728/ATPLC4/VQ18/LSU3/ATGLR2.2/FAR4/NA/RHA2A/NA/RHA2B/NA/NA/RFI2/AtRLP24/AT-RSH2/DLO1/PYE/ATMKP2/BIP/APRR9/SAUR37/ATRMA1/ATMPK19/NHL13/SAP2/ATNCED3/NA/NA/BIP1/ATSTP13/ATWRKY57/NA/EGR2/KFB20/AHB1/AtCIPK16/AtNPF5.2/ATMYB59/ATOXS3/ATBT4/AtIDD14/PP2CG1/NA/NA/AtKFB50/P5CS2/ATRMA2/ATCTL1/OXR2/AtDWF4
## GO:0042221 AtNIGT1/AtGDPD2/NA/STY46/ATBCA4/DIN10/B73/AtbZIP63/PIP2;3/KFB01/UGT76B1/ABF3/ARR9/ASG1/ARR4/ATNRT3.1/NA/ABS3/ATGA3OX1/COP3/NA/ACH1/STOP2/CAR9/ATWRKY18/GBF3/5PTASE2/NA/NA/NA/DHAR2/AtATL15/GIS3/ATSEN1/ARR3/VQ18/LSU3/ATGLR2.2/RHA2A/RHA2B/AT-RSH2/DLO1/ATMKP2/BIP/SAUR37/ATRMA1/SAP2/ATNCED3/NA/BIP1/ATSTP13/ATWRKY57/EGR2/KFB20/AHB1/AtNPF5.2/ATMYB59/ATOXS3/ATBT4/PP2CG1/NA/NA/AtKFB50/P5CS2/ATRMA2/ATCTL1/OXR2/AtDWF4
## GO:0051716 AtNIGT1/AtGDPD2/NA/STY46/DIN10/ATMPK18/ATRLP57/B73/AtbZIP63/KFB01/UGT76B1/ABF3/ARR9/ATWL2/ARR4/ATGA3OX1/COP3/ACH1/CAR9/NA/SDI2/NA/DHAR2/HYH/GIS3/ARR3/ATPLC4/VQ18/LSU3/ATGLR2.2/NA/RHA2A/RHA2B/RFI2/AtRLP24/PYE/ATMKP2/BIP/APRR9/ATRMA1/ATMPK19/SAP2/NA/BIP1/NA/KFB20/AHB1/AtCIPK16/ATMYB59/NA/NA/AtKFB50/ATRMA2/OXR2/AtDWF4
## GO:0010033 AtNIGT1/STY46/ATBCA4/B73/AtbZIP63/KFB01/UGT76B1/ABF3/ARR9/ASG1/ARR4/NA/ATGA3OX1/COP3/CAR9/ATWRKY18/GBF3/5PTASE2/NA/NA/NA/DHAR2/AtATL15/GIS3/ATSEN1/ARR3/VQ18/ATGLR2.2/RHA2A/RHA2B/AT-RSH2/DLO1/BIP/SAUR37/ATRMA1/NA/BIP1/ATSTP13/KFB20/AtNPF5.2/ATBT4/PP2CG1/NA/AtKFB50/P5CS2/ATRMA2/ATCTL1/AtDWF4
## GO:0070887 AtNIGT1/AtGDPD2/NA/STY46/DIN10/B73/AtbZIP63/KFB01/UGT76B1/ABF3/ARR9/ARR4/ATGA3OX1/COP3/ACH1/CAR9/NA/NA/DHAR2/GIS3/ARR3/VQ18/LSU3/ATGLR2.2/RHA2A/RHA2B/ATMKP2/BIP/ATRMA1/SAP2/BIP1/KFB20/AHB1/ATMYB59/NA/NA/AtKFB50/ATRMA2/OXR2/AtDWF4
## GO:0009719 AtNIGT1/STY46/B73/AtbZIP63/KFB01/ABF3/ARR9/ASG1/ARR4/ATGA3OX1/COP3/CAR9/GBF3/5PTASE2/NA/NA/GIS3/ATSEN1/ARR3/VQ18/ATGLR2.2/RHA2A/RHA2B/AT-RSH2/SAUR37/ATSTP13/KFB20/AtNPF5.2/ATBT4/PP2CG1/NA/AtKFB50/P5CS2/ATCTL1/AtDWF4
## GO:0007165 AtNIGT1/ATMPK18/ATRLP57/B73/KFB01/UGT76B1/ABF3/ARR9/ATWL2/ARR4/ATGA3OX1/COP3/CAR9/NA/DHAR2/HYH/GIS3/ARR3/ATPLC4/ATGLR2.2/RHA2A/RHA2B/RFI2/AtRLP24/ATMKP2/BIP/APRR9/ATMPK19/BIP1/NA/KFB20/AtCIPK16/NA/AtKFB50/AtDWF4
## GO:0009725 AtNIGT1/STY46/B73/AtbZIP63/KFB01/ABF3/ARR9/ASG1/ARR4/ATGA3OX1/COP3/CAR9/GBF3/5PTASE2/NA/NA/GIS3/ATSEN1/ARR3/VQ18/RHA2A/RHA2B/AT-RSH2/SAUR37/ATSTP13/KFB20/AtNPF5.2/ATBT4/PP2CG1/NA/AtKFB50/P5CS2/ATCTL1/AtDWF4
## GO:0071310 AtNIGT1/B73/AtbZIP63/KFB01/UGT76B1/ABF3/ARR9/ARR4/ATGA3OX1/COP3/CAR9/NA/NA/DHAR2/GIS3/ARR3/VQ18/ATGLR2.2/RHA2A/RHA2B/BIP/ATRMA1/BIP1/KFB20/NA/AtKFB50/ATRMA2/AtDWF4
## GO:0033993 AtNIGT1/STY46/AtbZIP63/ABF3/ASG1/ATGA3OX1/CAR9/GBF3/5PTASE2/NA/NA/GIS3/ATSEN1/VQ18/RHA2A/RHA2B/AT-RSH2/ATSTP13/AtNPF5.2/ATBT4/PP2CG1/NA/P5CS2/AtDWF4
## GO:0071495 AtNIGT1/B73/AtbZIP63/KFB01/ABF3/ARR9/ARR4/ATGA3OX1/COP3/CAR9/NA/GIS3/ARR3/VQ18/ATGLR2.2/RHA2A/RHA2B/KFB20/NA/AtKFB50/AtDWF4
## GO:0032870 AtNIGT1/B73/AtbZIP63/KFB01/ABF3/ARR9/ARR4/ATGA3OX1/COP3/CAR9/NA/GIS3/ARR3/VQ18/RHA2A/RHA2B/KFB20/NA/AtKFB50/AtDWF4
## GO:1901698 AtNIGT1/ATNRT3.1/ACH1/NA/NA/ATGLR2.2/BIP/ATRMA1/NA/BIP1/AtNPF5.2/ATRMA2/ATCTL1
## Count
## GO:0050896 98
## GO:0042221 68
## GO:0051716 55
## GO:0010033 48
## GO:0070887 40
## GO:0009719 35
## GO:0007165 35
## GO:0009725 34
## GO:0071310 28
## GO:0033993 24
## GO:0071495 21
## GO:0032870 20
## GO:1901698 13
## [1] "LASSO 990 genes, 0-0.2 \n"
## [1] ID Description GeneRatio BgRatio pvalue p.adjust
## [7] qvalue geneID Count
## <0 rows> (or 0-length row.names)
## [1] "RF 859 genes, min mse at 0-0.2"
## [1] ID Description GeneRatio BgRatio pvalue p.adjust
## [7] qvalue geneID Count
## <0 rows> (or 0-length row.names)
## [1] "LASSO 279 genes, 0.3-0.6 \n"
## [1] ID Description GeneRatio BgRatio pvalue p.adjust
## [7] qvalue geneID Count
## <0 rows> (or 0-length row.names)
## [1] "RF 295 genes, min mse at 0.3-0.6"
## ID Description GeneRatio BgRatio
## GO:0048731 GO:0048731 system development 50/256 154/1192
## GO:0000003 GO:0000003 reproduction 40/256 114/1192
## GO:0022414 GO:0022414 reproductive process 40/256 114/1192
## GO:0006955 GO:0006955 immune response 22/256 50/1192
## GO:0002376 GO:0002376 immune system process 22/256 51/1192
## GO:0098542 GO:0098542 defense response to other organism 20/256 46/1192
## pvalue p.adjust qvalue
## GO:0048731 0.0004408213 0.04254626 0.04093893
## GO:0000003 0.0002974878 0.03801988 0.03658354
## GO:0022414 0.0002974878 0.03801988 0.03658354
## GO:0006955 0.0002189647 0.03801988 0.03658354
## GO:0002376 0.0003110010 0.03801988 0.03658354
## GO:0098542 0.0005220400 0.04254626 0.04093893
## geneID
## GO:0048731 ABCG14/GAPCP-2/ASL39/PTL/AAE3/GAT/DVL4/UMAMIT11/MDH/CYP78A9/MEE49/MAKR5/AHK1/ROT3/MEE31/DPG1/RGA/ACH2/AFB3/AtLARP1b/AtMAX2/BLH2/At2-MMP/RBL/RBL/ACBP/ATXTH28/PIPL3/BZR1/emb1379/ATXIB/ATMYB68/ATWRKY2/AGC1-1/AHL29/ATERS/ExAD/OVA1/UMAMIT14/BPE/IAA30/GRP23/WOX5/ATPRMT10/AtCLE6/ANAC029/MUCI70/HAE/ADO3/ATMYB61
## GO:0000003 ABCG14/PTL/AAE3/UMAMIT11/MDH/CYP78A9/MEE49/AHK1/ROT3/MEE31/DPG1/RGA/AFB3/At2-MMP/ARK3/APK3/RBL/RBL/ACBP/ATXTH28/BZR1/emb1379/ATCBL9/BUBR1/ATWRKY2/AHL29/ATERS/OVA1/ATB'/UMAMIT14/BPE/IAA30/GRP23/ATPRMT10/ANAC029/EDA5/MUCI70/HAE/ADO3/ATMYB61
## GO:0022414 ABCG14/PTL/AAE3/UMAMIT11/MDH/CYP78A9/MEE49/AHK1/ROT3/MEE31/DPG1/RGA/AFB3/At2-MMP/ARK3/APK3/RBL/RBL/ACBP/ATXTH28/BZR1/emb1379/ATCBL9/BUBR1/ATWRKY2/AHL29/ATERS/OVA1/ATB'/UMAMIT14/BPE/IAA30/GRP23/ATPRMT10/ANAC029/EDA5/MUCI70/HAE/ADO3/ATMYB61
## GO:0006955 ABCG14/AAE3/OBF4/MDH/ATTLP1/EIF(ISO)4E/POQR/AT-POX/PUB25/CSA1/PBL36/At2-MMP/MOS4/MPL1/NA/HR3/NA/SNIPER4/LecRK-I.10/ATEXO70E2/HAE/AtMYB28
## GO:0002376 ABCG14/AAE3/OBF4/MDH/ATTLP1/EIF(ISO)4E/POQR/AT-POX/PUB25/CSA1/PBL36/At2-MMP/MOS4/MPL1/NA/HR3/NA/SNIPER4/LecRK-I.10/ATEXO70E2/HAE/AtMYB28
## GO:0098542 ABCG14/AAE3/OBF4/MDH/ATTLP1/EIF(ISO)4E/POQR/AT-POX/PUB25/CSA1/At2-MMP/MOS4/MPL1/NA/HR3/NA/SNIPER4/LecRK-I.10/HAE/AtMYB28
## Count
## GO:0048731 50
## GO:0000003 40
## GO:0022414 40
## GO:0006955 22
## GO:0002376 22
## GO:0098542 20
go_all <- enrich_go(convert_from_agi(genes), convert_from_agi(rownames(gene_annotations$`Arabidopsis thaliana`)))
draw_enrich_go(go_all)
Rien de spécial n’est trouvé en lien avec les ontologies enrichies dans certains groupes, la liste de base est enrichie en ribosomal functions, et ça se retrouve dans des sous groupes comparés à l’ensemble du génome. comparés à la liste de base, les sous groups ne sont quasiment pas enrichis…
expression <- data.frame(counts)
expression <- (expression-rowMeans(expression)) / matrixStats::rowSds(as.matrix(expression))
Heatmap(expression, cluster_columns = F, show_row_names = F)+ rowAnnotation(
clusters_lasso = clusters_lasso, clusters_rf = clusters_rf,
col=list(clusters_lasso= setNames(c("darkorange", "yellow", "green"),
nm = names(table(clusters_lasso))),
clusters_rf= setNames(c("darkorange", "yellow", "green"),
nm = names(table(clusters_rf)))))
expression$genes <- rownames(expression)
reshape2::melt(expression) %>%
mutate(time = extract_numeric(str_split_fixed(variable, '_', 2)[,1]),
rep = str_split_fixed(variable, '_', 2)[,2],
trt = substr(variable,1,1),
group_lasso = clusters_lasso[genes],
group_rf = clusters_rf[genes]) %>%
filter(trt!="T")%>%
ggplot(aes(x=time, y=value, col = group_lasso, group =genes))+
geom_line(alpha = 0.1) +
facet_wrap(group_lasso~ trt, nrow = 1)
## Using genes as id variables
## extract_numeric() is deprecated: please use readr::parse_number() instead
reshape2::melt(expression) %>%
mutate(time = extract_numeric(str_split_fixed(variable, '_', 2)[,1]),
rep = str_split_fixed(variable, '_', 2)[,2],
trt = substr(variable,1,1),
group_lasso = clusters_lasso[genes],
group_rf = clusters_rf[genes]) %>%
filter(trt!="T")%>%
ggplot(aes(x=time, y=value, col = group_rf, group =genes))+
geom_line(alpha = 0.1) +
facet_wrap(group_rf~ trt, nrow = 1)
## Using genes as id variables
## extract_numeric() is deprecated: please use readr::parse_number() instead